{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from cot import Collection\n",
    "from cot.stats import evaluation_as_table"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### generate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "coll = Collection.from_json(\"ts_67_empty\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Configuration of the input and parameters of the language model \n",
    "config={\n",
    "    \"instruction_keys\": [\"zhou-01-ins\", \"qa-10\", \"qa-12\", \"qa-13\", \"qa-16\", \"qa-17\"],\n",
    "    \"cot_trigger_keys\": [None],\n",
    "    \"answer_extraction_keys\": 'auto-kojima', \n",
    "    \"author\" : \"thoughtsource\",\n",
    "    \"api_service\": \"openai_chat\",\n",
    "    \"api_time_interval\": 1,\n",
    "    \"engine\": \"gpt-3.5-turbo\", \n",
    "    \"temperature\": 0,\n",
    "    \"max_tokens\": 512,\n",
    "    \"verbose\": False,\n",
    "    \"warn\": False,\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generating commonsense_qa...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e7b56d4eadec45e39f26a1171d7ee67c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/67 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 81a5048059938c0b806dd96f5b8fe23a in your message.).\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generating med_qa...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "96817028fb9141c9b2d69d9ca9f631a1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/67 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIError: HTTP code 502 from API (<!DOCTYPE html>\n",
      "<!--[if lt IE 7]> <html class=\"no-js ie6 oldie\" lang=\"en-US\"> <![endif]-->\n",
      "<!--[if IE 7]>    <html class=\"no-js ie7 oldie\" lang=\"en-US\"> <![endif]-->\n",
      "<!--[if IE 8]>    <html class=\"no-js ie8 oldie\" lang=\"en-US\"> <![endif]-->\n",
      "<!--[if gt IE 8]><!--> <html class=\"no-js\" lang=\"en-US\"> <!--<![endif]-->\n",
      "<head>\n",
      "\n",
      "\n",
      "<title>api.openai.com | 502: Bad gateway</title>\n",
      "<meta charset=\"UTF-8\" />\n",
      "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n",
      "<meta http-equiv=\"X-UA-Compatible\" content=\"IE=Edge\" />\n",
      "<meta name=\"robots\" content=\"noindex, nofollow\" />\n",
      "<meta name=\"viewport\" content=\"width=device-width,initial-scale=1\" />\n",
      "<link rel=\"stylesheet\" id=\"cf_styles-css\" href=\"/cdn-cgi/styles/main.css\" />\n",
      "\n",
      "\n",
      "</head>\n",
      "<body>\n",
      "<div id=\"cf-wrapper\">\n",
      "\n",
      "    \n",
      "\n",
      "    <div id=\"cf-error-details\" class=\"p-0\">\n",
      "        <header class=\"mx-auto pt-10 lg:pt-6 lg:px-8 w-240 lg:w-full mb-8\">\n",
      "            <h1 class=\"inline-block sm:block sm:mb-2 font-light text-60 lg:text-4xl text-black-dark leading-tight mr-2\">\n",
      "              \n",
      "              <span class=\"inline-block\">Bad gateway</span>\n",
      "              <span class=\"code-label\">Error code 502</span>\n",
      "            </h1>\n",
      "            <div>\n",
      "               Visit <a href=\"https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_502&utm_campaign=api.openai.com\" target=\"_blank\" rel=\"noopener noreferrer\">cloudflare.com</a> for more information.\n",
      "            </div>\n",
      "            <div class=\"mt-3\">2023-04-12 11:37:49 UTC</div>\n",
      "        </header>\n",
      "        \n",
      "        <div class=\"my-8 bg-gradient-gray\">\n",
      "            <div class=\"w-240 lg:w-full mx-auto\">\n",
      "                <div class=\"clearfix md:px-8\">\n",
      "                  \n",
      "<div id=\"cf-browser-status\" class=\" relative w-1/3 md:w-full py-15 md:p-0 md:py-8 md:text-left md:border-solid md:border-0 md:border-b md:border-gray-400 overflow-hidden float-left md:float-none text-center\">\n",
      "  <div class=\"relative mb-10 md:m-0\">\n",
      "    \n",
      "    <span class=\"cf-icon-browser block md:hidden h-20 bg-center bg-no-repeat\"></span>\n",
      "    <span class=\"cf-icon-ok w-12 h-12 absolute left-1/2 md:left-auto md:right-0 md:top-0 -ml-6 -bottom-4\"></span>\n",
      "    \n",
      "  </div>\n",
      "  <span class=\"md:block w-full truncate\">You</span>\n",
      "  <h3 class=\"md:inline-block mt-3 md:mt-0 text-2xl text-gray-600 font-light leading-1.3\">\n",
      "    \n",
      "    Browser\n",
      "    \n",
      "  </h3>\n",
      "  <span class=\"leading-1.3 text-2xl text-green-success\">Working</span>\n",
      "</div>\n",
      "\n",
      "<div id=\"cf-cloudflare-status\" class=\" relative w-1/3 md:w-full py-15 md:p-0 md:py-8 md:text-left md:border-solid md:border-0 md:border-b md:border-gray-400 overflow-hidden float-left md:float-none text-center\">\n",
      "  <div class=\"relative mb-10 md:m-0\">\n",
      "    <a href=\"https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_502&utm_campaign=api.openai.com\" target=\"_blank\" rel=\"noopener noreferrer\">\n",
      "    <span class=\"cf-icon-cloud block md:hidden h-20 bg-center bg-no-repeat\"></span>\n",
      "    <span class=\"cf-icon-ok w-12 h-12 absolute left-1/2 md:left-auto md:right-0 md:top-0 -ml-6 -bottom-4\"></span>\n",
      "    </a>\n",
      "  </div>\n",
      "  <span class=\"md:block w-full truncate\">Vienna</span>\n",
      "  <h3 class=\"md:inline-block mt-3 md:mt-0 text-2xl text-gray-600 font-light leading-1.3\">\n",
      "    <a href=\"https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_502&utm_campaign=api.openai.com\" target=\"_blank\" rel=\"noopener noreferrer\">\n",
      "    Cloudflare\n",
      "    </a>\n",
      "  </h3>\n",
      "  <span class=\"leading-1.3 text-2xl text-green-success\">Working</span>\n",
      "</div>\n",
      "\n",
      "<div id=\"cf-host-status\" class=\"cf-error-source relative w-1/3 md:w-full py-15 md:p-0 md:py-8 md:text-left md:border-solid md:border-0 md:border-b md:border-gray-400 overflow-hidden float-left md:float-none text-center\">\n",
      "  <div class=\"relative mb-10 md:m-0\">\n",
      "    \n",
      "    <span class=\"cf-icon-server block md:hidden h-20 bg-center bg-no-repeat\"></span>\n",
      "    <span class=\"cf-icon-error w-12 h-12 absolute left-1/2 md:left-auto md:right-0 md:top-0 -ml-6 -bottom-4\"></span>\n",
      "    \n",
      "  </div>\n",
      "  <span class=\"md:block w-full truncate\">api.openai.com</span>\n",
      "  <h3 class=\"md:inline-block mt-3 md:mt-0 text-2xl text-gray-600 font-light leading-1.3\">\n",
      "    \n",
      "    Host\n",
      "    \n",
      "  </h3>\n",
      "  <span class=\"leading-1.3 text-2xl text-red-error\">Error</span>\n",
      "</div>\n",
      "\n",
      "                </div>\n",
      "              \n",
      "            </div>\n",
      "        </div>\n",
      "\n",
      "        <div class=\"w-240 lg:w-full mx-auto mb-8 lg:px-8\">\n",
      "            <div class=\"clearfix\">\n",
      "                <div class=\"w-1/2 md:w-full float-left pr-6 md:pb-10 md:pr-0 leading-relaxed\">\n",
      "                    <h2 class=\"text-3xl font-normal leading-1.3 mb-4\">What happened?</h2>\n",
      "                    <p>The web server reported a bad gateway error.</p>\n",
      "                </div>\n",
      "              \n",
      "                <div class=\"w-1/2 md:w-full float-left leading-relaxed\">\n",
      "                    <h2 class=\"text-3xl font-normal leading-1.3 mb-4\">What can I do?</h2>\n",
      "                    <p class=\"mb-6\">Please try again in a few minutes.</p>\n",
      "                </div>\n",
      "            </div>\n",
      "              \n",
      "        </div>\n",
      "\n",
      "        <div class=\"cf-error-footer cf-wrapper w-240 lg:w-full py-10 sm:py-4 sm:px-8 mx-auto text-center sm:text-left border-solid border-0 border-t border-gray-300\">\n",
      "  <p class=\"text-13\">\n",
      "    <span class=\"cf-footer-item sm:block sm:mb-1\">Cloudflare Ray ID: <strong class=\"font-semibold\">7b6b265d9cd63249</strong></span>\n",
      "    <span class=\"cf-footer-separator sm:hidden\">&bull;</span>\n",
      "    <span id=\"cf-footer-item-ip\" class=\"cf-footer-item hidden sm:block sm:mb-1\">\n",
      "      Your IP:\n",
      "      <button type=\"button\" id=\"cf-footer-ip-reveal\" class=\"cf-footer-ip-reveal-btn\">Click to reveal</button>\n",
      "      <span class=\"hidden\" id=\"cf-footer-ip\">193.171.177.145</span>\n",
      "      <span class=\"cf-footer-separator sm:hidden\">&bull;</span>\n",
      "    </span>\n",
      "    <span class=\"cf-footer-item sm:block sm:mb-1\"><span>Performance &amp; security by</span> <a rel=\"noopener noreferrer\" href=\"https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_502&utm_campaign=api.openai.com\" id=\"brand_link\" target=\"_blank\">Cloudflare</a></span>\n",
      "    \n",
      "  </p>\n",
      "  <script>(function(){function d(){var b=a.getElementById(\"cf-footer-item-ip\"),c=a.getElementById(\"cf-footer-ip-reveal\");b&&\"classList\"in b&&(b.classList.remove(\"hidden\"),c.addEventListener(\"click\",function(){c.classList.add(\"hidden\");a.getElementById(\"cf-footer-ip\").classList.remove(\"hidden\")}))}var a=document;document.addEventListener&&a.addEventListener(\"DOMContentLoaded\",d)})();</script>\n",
      "</div><!-- /.error-footer -->\n",
      "\n",
      "\n",
      "    </div>\n",
      "</div>\n",
      "</body>\n",
      "</html>\n",
      "\n",
      ").\n",
      "Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIError: HTTP code 502 from API (<!DOCTYPE html>\n",
      "<!--[if lt IE 7]> <html class=\"no-js ie6 oldie\" lang=\"en-US\"> <![endif]-->\n",
      "<!--[if IE 7]>    <html class=\"no-js ie7 oldie\" lang=\"en-US\"> <![endif]-->\n",
      "<!--[if IE 8]>    <html class=\"no-js ie8 oldie\" lang=\"en-US\"> <![endif]-->\n",
      "<!--[if gt IE 8]><!--> <html class=\"no-js\" lang=\"en-US\"> <!--<![endif]-->\n",
      "<head>\n",
      "\n",
      "\n",
      "<title>api.openai.com | 502: Bad gateway</title>\n",
      "<meta charset=\"UTF-8\" />\n",
      "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n",
      "<meta http-equiv=\"X-UA-Compatible\" content=\"IE=Edge\" />\n",
      "<meta name=\"robots\" content=\"noindex, nofollow\" />\n",
      "<meta name=\"viewport\" content=\"width=device-width,initial-scale=1\" />\n",
      "<link rel=\"stylesheet\" id=\"cf_styles-css\" href=\"/cdn-cgi/styles/main.css\" />\n",
      "\n",
      "\n",
      "</head>\n",
      "<body>\n",
      "<div id=\"cf-wrapper\">\n",
      "\n",
      "    \n",
      "\n",
      "    <div id=\"cf-error-details\" class=\"p-0\">\n",
      "        <header class=\"mx-auto pt-10 lg:pt-6 lg:px-8 w-240 lg:w-full mb-8\">\n",
      "            <h1 class=\"inline-block sm:block sm:mb-2 font-light text-60 lg:text-4xl text-black-dark leading-tight mr-2\">\n",
      "              \n",
      "              <span class=\"inline-block\">Bad gateway</span>\n",
      "              <span class=\"code-label\">Error code 502</span>\n",
      "            </h1>\n",
      "            <div>\n",
      "               Visit <a href=\"https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_502&utm_campaign=api.openai.com\" target=\"_blank\" rel=\"noopener noreferrer\">cloudflare.com</a> for more information.\n",
      "            </div>\n",
      "            <div class=\"mt-3\">2023-04-12 12:30:27 UTC</div>\n",
      "        </header>\n",
      "        \n",
      "        <div class=\"my-8 bg-gradient-gray\">\n",
      "            <div class=\"w-240 lg:w-full mx-auto\">\n",
      "                <div class=\"clearfix md:px-8\">\n",
      "                  \n",
      "<div id=\"cf-browser-status\" class=\" relative w-1/3 md:w-full py-15 md:p-0 md:py-8 md:text-left md:border-solid md:border-0 md:border-b md:border-gray-400 overflow-hidden float-left md:float-none text-center\">\n",
      "  <div class=\"relative mb-10 md:m-0\">\n",
      "    \n",
      "    <span class=\"cf-icon-browser block md:hidden h-20 bg-center bg-no-repeat\"></span>\n",
      "    <span class=\"cf-icon-ok w-12 h-12 absolute left-1/2 md:left-auto md:right-0 md:top-0 -ml-6 -bottom-4\"></span>\n",
      "    \n",
      "  </div>\n",
      "  <span class=\"md:block w-full truncate\">You</span>\n",
      "  <h3 class=\"md:inline-block mt-3 md:mt-0 text-2xl text-gray-600 font-light leading-1.3\">\n",
      "    \n",
      "    Browser\n",
      "    \n",
      "  </h3>\n",
      "  <span class=\"leading-1.3 text-2xl text-green-success\">Working</span>\n",
      "</div>\n",
      "\n",
      "<div id=\"cf-cloudflare-status\" class=\" relative w-1/3 md:w-full py-15 md:p-0 md:py-8 md:text-left md:border-solid md:border-0 md:border-b md:border-gray-400 overflow-hidden float-left md:float-none text-center\">\n",
      "  <div class=\"relative mb-10 md:m-0\">\n",
      "    <a href=\"https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_502&utm_campaign=api.openai.com\" target=\"_blank\" rel=\"noopener noreferrer\">\n",
      "    <span class=\"cf-icon-cloud block md:hidden h-20 bg-center bg-no-repeat\"></span>\n",
      "    <span class=\"cf-icon-ok w-12 h-12 absolute left-1/2 md:left-auto md:right-0 md:top-0 -ml-6 -bottom-4\"></span>\n",
      "    </a>\n",
      "  </div>\n",
      "  <span class=\"md:block w-full truncate\">Vienna</span>\n",
      "  <h3 class=\"md:inline-block mt-3 md:mt-0 text-2xl text-gray-600 font-light leading-1.3\">\n",
      "    <a href=\"https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_502&utm_campaign=api.openai.com\" target=\"_blank\" rel=\"noopener noreferrer\">\n",
      "    Cloudflare\n",
      "    </a>\n",
      "  </h3>\n",
      "  <span class=\"leading-1.3 text-2xl text-green-success\">Working</span>\n",
      "</div>\n",
      "\n",
      "<div id=\"cf-host-status\" class=\"cf-error-source relative w-1/3 md:w-full py-15 md:p-0 md:py-8 md:text-left md:border-solid md:border-0 md:border-b md:border-gray-400 overflow-hidden float-left md:float-none text-center\">\n",
      "  <div class=\"relative mb-10 md:m-0\">\n",
      "    \n",
      "    <span class=\"cf-icon-server block md:hidden h-20 bg-center bg-no-repeat\"></span>\n",
      "    <span class=\"cf-icon-error w-12 h-12 absolute left-1/2 md:left-auto md:right-0 md:top-0 -ml-6 -bottom-4\"></span>\n",
      "    \n",
      "  </div>\n",
      "  <span class=\"md:block w-full truncate\">api.openai.com</span>\n",
      "  <h3 class=\"md:inline-block mt-3 md:mt-0 text-2xl text-gray-600 font-light leading-1.3\">\n",
      "    \n",
      "    Host\n",
      "    \n",
      "  </h3>\n",
      "  <span class=\"leading-1.3 text-2xl text-red-error\">Error</span>\n",
      "</div>\n",
      "\n",
      "                </div>\n",
      "              \n",
      "            </div>\n",
      "        </div>\n",
      "\n",
      "        <div class=\"w-240 lg:w-full mx-auto mb-8 lg:px-8\">\n",
      "            <div class=\"clearfix\">\n",
      "                <div class=\"w-1/2 md:w-full float-left pr-6 md:pb-10 md:pr-0 leading-relaxed\">\n",
      "                    <h2 class=\"text-3xl font-normal leading-1.3 mb-4\">What happened?</h2>\n",
      "                    <p>The web server reported a bad gateway error.</p>\n",
      "                </div>\n",
      "              \n",
      "                <div class=\"w-1/2 md:w-full float-left leading-relaxed\">\n",
      "                    <h2 class=\"text-3xl font-normal leading-1.3 mb-4\">What can I do?</h2>\n",
      "                    <p class=\"mb-6\">Please try again in a few minutes.</p>\n",
      "                </div>\n",
      "            </div>\n",
      "              \n",
      "        </div>\n",
      "\n",
      "        <div class=\"cf-error-footer cf-wrapper w-240 lg:w-full py-10 sm:py-4 sm:px-8 mx-auto text-center sm:text-left border-solid border-0 border-t border-gray-300\">\n",
      "  <p class=\"text-13\">\n",
      "    <span class=\"cf-footer-item sm:block sm:mb-1\">Cloudflare Ray ID: <strong class=\"font-semibold\">7b6b737b292a3249</strong></span>\n",
      "    <span class=\"cf-footer-separator sm:hidden\">&bull;</span>\n",
      "    <span id=\"cf-footer-item-ip\" class=\"cf-footer-item hidden sm:block sm:mb-1\">\n",
      "      Your IP:\n",
      "      <button type=\"button\" id=\"cf-footer-ip-reveal\" class=\"cf-footer-ip-reveal-btn\">Click to reveal</button>\n",
      "      <span class=\"hidden\" id=\"cf-footer-ip\">193.171.177.145</span>\n",
      "      <span class=\"cf-footer-separator sm:hidden\">&bull;</span>\n",
      "    </span>\n",
      "    <span class=\"cf-footer-item sm:block sm:mb-1\"><span>Performance &amp; security by</span> <a rel=\"noopener noreferrer\" href=\"https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_502&utm_campaign=api.openai.com\" id=\"brand_link\" target=\"_blank\">Cloudflare</a></span>\n",
      "    \n",
      "  </p>\n",
      "  <script>(function(){function d(){var b=a.getElementById(\"cf-footer-item-ip\"),c=a.getElementById(\"cf-footer-ip-reveal\");b&&\"classList\"in b&&(b.classList.remove(\"hidden\"),c.addEventListener(\"click\",function(){c.classList.add(\"hidden\");a.getElementById(\"cf-footer-ip\").classList.remove(\"hidden\")}))}var a=document;document.addEventListener&&a.addEventListener(\"DOMContentLoaded\",d)})();</script>\n",
      "</div><!-- /.error-footer -->\n",
      "\n",
      "\n",
      "    </div>\n",
      "</div>\n",
      "</body>\n",
      "</html>\n",
      "\n",
      ").\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generating medmc_qa...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "17f6838a25994579865ba7d34d5776cc",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/67 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 8a48844259a545773f4b57d19df97caf in your message.).\n",
      "Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID c22c24a064f87399aebd5cd5ac356750 in your message.).\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generating open_book_qa...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "89b357542a2a47d09c789e58cdf01f3c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/67 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 24772bd2c4ba6b2f4b98130cfd1286a0 in your message.).\n",
      "Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIError: HTTP code 502 from API (<!DOCTYPE html>\n",
      "<!--[if lt IE 7]> <html class=\"no-js ie6 oldie\" lang=\"en-US\"> <![endif]-->\n",
      "<!--[if IE 7]>    <html class=\"no-js ie7 oldie\" lang=\"en-US\"> <![endif]-->\n",
      "<!--[if IE 8]>    <html class=\"no-js ie8 oldie\" lang=\"en-US\"> <![endif]-->\n",
      "<!--[if gt IE 8]><!--> <html class=\"no-js\" lang=\"en-US\"> <!--<![endif]-->\n",
      "<head>\n",
      "\n",
      "\n",
      "<title>api.openai.com | 502: Bad gateway</title>\n",
      "<meta charset=\"UTF-8\" />\n",
      "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n",
      "<meta http-equiv=\"X-UA-Compatible\" content=\"IE=Edge\" />\n",
      "<meta name=\"robots\" content=\"noindex, nofollow\" />\n",
      "<meta name=\"viewport\" content=\"width=device-width,initial-scale=1\" />\n",
      "<link rel=\"stylesheet\" id=\"cf_styles-css\" href=\"/cdn-cgi/styles/main.css\" />\n",
      "\n",
      "\n",
      "</head>\n",
      "<body>\n",
      "<div id=\"cf-wrapper\">\n",
      "\n",
      "    \n",
      "\n",
      "    <div id=\"cf-error-details\" class=\"p-0\">\n",
      "        <header class=\"mx-auto pt-10 lg:pt-6 lg:px-8 w-240 lg:w-full mb-8\">\n",
      "            <h1 class=\"inline-block sm:block sm:mb-2 font-light text-60 lg:text-4xl text-black-dark leading-tight mr-2\">\n",
      "              \n",
      "              <span class=\"inline-block\">Bad gateway</span>\n",
      "              <span class=\"code-label\">Error code 502</span>\n",
      "            </h1>\n",
      "            <div>\n",
      "               Visit <a href=\"https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_502&utm_campaign=api.openai.com\" target=\"_blank\" rel=\"noopener noreferrer\">cloudflare.com</a> for more information.\n",
      "            </div>\n",
      "            <div class=\"mt-3\">2023-04-12 14:57:53 UTC</div>\n",
      "        </header>\n",
      "        \n",
      "        <div class=\"my-8 bg-gradient-gray\">\n",
      "            <div class=\"w-240 lg:w-full mx-auto\">\n",
      "                <div class=\"clearfix md:px-8\">\n",
      "                  \n",
      "<div id=\"cf-browser-status\" class=\" relative w-1/3 md:w-full py-15 md:p-0 md:py-8 md:text-left md:border-solid md:border-0 md:border-b md:border-gray-400 overflow-hidden float-left md:float-none text-center\">\n",
      "  <div class=\"relative mb-10 md:m-0\">\n",
      "    \n",
      "    <span class=\"cf-icon-browser block md:hidden h-20 bg-center bg-no-repeat\"></span>\n",
      "    <span class=\"cf-icon-ok w-12 h-12 absolute left-1/2 md:left-auto md:right-0 md:top-0 -ml-6 -bottom-4\"></span>\n",
      "    \n",
      "  </div>\n",
      "  <span class=\"md:block w-full truncate\">You</span>\n",
      "  <h3 class=\"md:inline-block mt-3 md:mt-0 text-2xl text-gray-600 font-light leading-1.3\">\n",
      "    \n",
      "    Browser\n",
      "    \n",
      "  </h3>\n",
      "  <span class=\"leading-1.3 text-2xl text-green-success\">Working</span>\n",
      "</div>\n",
      "\n",
      "<div id=\"cf-cloudflare-status\" class=\" relative w-1/3 md:w-full py-15 md:p-0 md:py-8 md:text-left md:border-solid md:border-0 md:border-b md:border-gray-400 overflow-hidden float-left md:float-none text-center\">\n",
      "  <div class=\"relative mb-10 md:m-0\">\n",
      "    <a href=\"https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_502&utm_campaign=api.openai.com\" target=\"_blank\" rel=\"noopener noreferrer\">\n",
      "    <span class=\"cf-icon-cloud block md:hidden h-20 bg-center bg-no-repeat\"></span>\n",
      "    <span class=\"cf-icon-ok w-12 h-12 absolute left-1/2 md:left-auto md:right-0 md:top-0 -ml-6 -bottom-4\"></span>\n",
      "    </a>\n",
      "  </div>\n",
      "  <span class=\"md:block w-full truncate\">Vienna</span>\n",
      "  <h3 class=\"md:inline-block mt-3 md:mt-0 text-2xl text-gray-600 font-light leading-1.3\">\n",
      "    <a href=\"https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_502&utm_campaign=api.openai.com\" target=\"_blank\" rel=\"noopener noreferrer\">\n",
      "    Cloudflare\n",
      "    </a>\n",
      "  </h3>\n",
      "  <span class=\"leading-1.3 text-2xl text-green-success\">Working</span>\n",
      "</div>\n",
      "\n",
      "<div id=\"cf-host-status\" class=\"cf-error-source relative w-1/3 md:w-full py-15 md:p-0 md:py-8 md:text-left md:border-solid md:border-0 md:border-b md:border-gray-400 overflow-hidden float-left md:float-none text-center\">\n",
      "  <div class=\"relative mb-10 md:m-0\">\n",
      "    \n",
      "    <span class=\"cf-icon-server block md:hidden h-20 bg-center bg-no-repeat\"></span>\n",
      "    <span class=\"cf-icon-error w-12 h-12 absolute left-1/2 md:left-auto md:right-0 md:top-0 -ml-6 -bottom-4\"></span>\n",
      "    \n",
      "  </div>\n",
      "  <span class=\"md:block w-full truncate\">api.openai.com</span>\n",
      "  <h3 class=\"md:inline-block mt-3 md:mt-0 text-2xl text-gray-600 font-light leading-1.3\">\n",
      "    \n",
      "    Host\n",
      "    \n",
      "  </h3>\n",
      "  <span class=\"leading-1.3 text-2xl text-red-error\">Error</span>\n",
      "</div>\n",
      "\n",
      "                </div>\n",
      "              \n",
      "            </div>\n",
      "        </div>\n",
      "\n",
      "        <div class=\"w-240 lg:w-full mx-auto mb-8 lg:px-8\">\n",
      "            <div class=\"clearfix\">\n",
      "                <div class=\"w-1/2 md:w-full float-left pr-6 md:pb-10 md:pr-0 leading-relaxed\">\n",
      "                    <h2 class=\"text-3xl font-normal leading-1.3 mb-4\">What happened?</h2>\n",
      "                    <p>The web server reported a bad gateway error.</p>\n",
      "                </div>\n",
      "              \n",
      "                <div class=\"w-1/2 md:w-full float-left leading-relaxed\">\n",
      "                    <h2 class=\"text-3xl font-normal leading-1.3 mb-4\">What can I do?</h2>\n",
      "                    <p class=\"mb-6\">Please try again in a few minutes.</p>\n",
      "                </div>\n",
      "            </div>\n",
      "              \n",
      "        </div>\n",
      "\n",
      "        <div class=\"cf-error-footer cf-wrapper w-240 lg:w-full py-10 sm:py-4 sm:px-8 mx-auto text-center sm:text-left border-solid border-0 border-t border-gray-300\">\n",
      "  <p class=\"text-13\">\n",
      "    <span class=\"cf-footer-item sm:block sm:mb-1\">Cloudflare Ray ID: <strong class=\"font-semibold\">7b6c4b1f5b1dc2f2</strong></span>\n",
      "    <span class=\"cf-footer-separator sm:hidden\">&bull;</span>\n",
      "    <span id=\"cf-footer-item-ip\" class=\"cf-footer-item hidden sm:block sm:mb-1\">\n",
      "      Your IP:\n",
      "      <button type=\"button\" id=\"cf-footer-ip-reveal\" class=\"cf-footer-ip-reveal-btn\">Click to reveal</button>\n",
      "      <span class=\"hidden\" id=\"cf-footer-ip\">193.171.177.145</span>\n",
      "      <span class=\"cf-footer-separator sm:hidden\">&bull;</span>\n",
      "    </span>\n",
      "    <span class=\"cf-footer-item sm:block sm:mb-1\"><span>Performance &amp; security by</span> <a rel=\"noopener noreferrer\" href=\"https://www.cloudflare.com/5xx-error-landing?utm_source=errorcode_502&utm_campaign=api.openai.com\" id=\"brand_link\" target=\"_blank\">Cloudflare</a></span>\n",
      "    \n",
      "  </p>\n",
      "  <script>(function(){function d(){var b=a.getElementById(\"cf-footer-item-ip\"),c=a.getElementById(\"cf-footer-ip-reveal\");b&&\"classList\"in b&&(b.classList.remove(\"hidden\"),c.addEventListener(\"click\",function(){c.classList.add(\"hidden\");a.getElementById(\"cf-footer-ip\").classList.remove(\"hidden\")}))}var a=document;document.addEventListener&&a.addEventListener(\"DOMContentLoaded\",d)})();</script>\n",
      "</div><!-- /.error-footer -->\n",
      "\n",
      "\n",
      "    </div>\n",
      "</div>\n",
      "</body>\n",
      "</html>\n",
      "\n",
      ").\n",
      "Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 487f3b86c4eca403b8fdc37af8361c52 in your message.).\n",
      "Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 876bd4d404497a4f82b9b6e7f6c45ce2 in your message.).\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generating strategy_qa...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e181991c6cc44c21aa1dd5b45b813f13",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/67 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 4f5e7c645aa41d52d42d9bfa5cd8531e in your message.).\n",
      "Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 0fff84ceccd5717a235a0129a172de21 in your message.).\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generating worldtree...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "70ffd0cb75b840b9b8ee9b6d45a80ead",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/67 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 1e012b769acc2d04238abf924446bbcc in your message.).\n",
      "Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 1776c75e6214cef9a5fbd5f6f101608a in your message.).\n"
     ]
    }
   ],
   "source": [
    "coll.generate(config=config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "546ece73eb9c49f5a045e9b9cd439072",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/67 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6461e0730ec74023910576aaa5b4a92a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/67 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "56e99e7cb3bb424c853a294a284abf1a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/67 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4a13da22ad62402a9f2798c6c079a7b3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/67 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e022b753a8634df2ab0dd386acf02f83",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/67 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6ed4118a876942eba83833d6f3e6e5be",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/67 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "{'commonsense_qa': {'validation': {'accuracy': {'gpt-3.5-turbo': {'None_None_kojima-A-E': 0.761194,\n",
       "     'None_kojima-01_kojima-A-E': 0.716418,\n",
       "     'None_zhou-01_kojima-A-E': 0.776119}}}},\n",
       " 'med_qa': {'test': {'accuracy': {'gpt-3.5-turbo': {'None_None_kojima-A-E': 0.522388,\n",
       "     'None_kojima-01_kojima-A-E': 0.567164,\n",
       "     'None_zhou-01_kojima-A-E': 0.567164}}}},\n",
       " 'medmc_qa': {'validation': {'accuracy': {'gpt-3.5-turbo': {'None_None_kojima-A-D': 0.537313,\n",
       "     'None_kojima-01_kojima-A-D': 0.522388,\n",
       "     'None_zhou-01_kojima-A-D': 0.507463}}}},\n",
       " 'open_book_qa': {'test': {'accuracy': {'gpt-3.5-turbo': {'None_None_kojima-A-D': 0.791045,\n",
       "     'None_kojima-01_kojima-A-D': 0.80597,\n",
       "     'None_zhou-01_kojima-A-D': 0.701493}}}},\n",
       " 'strategy_qa': {'train': {'accuracy': {'gpt-3.5-turbo': {'None_None_kojima-yes-no': 0.61194,\n",
       "     'None_kojima-01_kojima-yes-no': 0.626866,\n",
       "     'None_zhou-01_kojima-yes-no': 0.641791}}}},\n",
       " 'worldtree': {'test': {'accuracy': {'gpt-3.5-turbo': {'None_None_kojima-A-D': 0.955224,\n",
       "     'None_kojima-01_kojima-A-D': 0.895522,\n",
       "     'None_zhou-01_kojima-A-D': 0.895522}}}}}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "coll.evaluate()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# join strings from a list with underscore\n",
    "def join_strings(list_of_strings):\n",
    "    if list_of_strings is None:\n",
    "        list_of_strings = [\"None\"]\n",
    "    if isinstance(list_of_strings, str):\n",
    "        list_of_strings = [list_of_strings]\n",
    "    joined_string = \"\"\n",
    "    for string in list_of_strings:\n",
    "        joined_string += (\"-\" + str(string))\n",
    "    # delete first underscore\n",
    "    joined_string = joined_string[1:]\n",
    "    return(joined_string)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e7088a6e41a84068ba9c33c61b8ae8b9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e33e1f5a93fc4360aaba7758ba61464f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "59bdf2189cc745b598bcdd85aeedd50b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b04181827a5d429a9d0511d72008f4e3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f68801209f624da8be02f95391bfa268",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f42b777a31714119ba481babcebe6eb4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "coll.dump(\"thoughtsource_67\" + \"_\" + config['api_service'] + \"_\" + config['engine'].replace(\"/\", \"_\") + \"_\" + join_strings(config[\"instruction_keys\"]) + \"_\" + join_strings(config[\"cot_trigger_keys\"]) + \".json\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### merge into"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "ts_67 = Collection.from_json(\"thoughtsource_67.json\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a1944587a3b9432690a18ab324d50261",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "556b598370594908967396f3af6adfd6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bcaf696d35ca419bb16d2b0a54d39e95",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "062a09c8ece14d19bb3eec3dc2cc5b06",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "dab50687bf6b4d11a968690a07b664ce",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a8484aeef6a24958adb461d791915a2b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e33316048f6540ff9a058803a5c0bea2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bfeb664d21c545979e83f2ea03a4f86d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cbbbcbb9944b488c97fb98393a7b3b86",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3e75e870578d4706ae28585ccf558a3d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "895da66b75034d948c9928764037bbf9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "922cceac55094cafbf9a08a2d6b87d29",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "ts_67 = ts_67.merge(coll)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "67c4bbc5576e46d8b003db16565de96f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6e4ac927473341d9928187e45b675076",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e0fcbf8b68fc47cc9ce8778640f83038",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3a5f575937104334979c6f59dc82bc1a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7b60f1290c7140058a176c6d1c97d2a4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "88c1b9ea69264f76ab6c2d7fe4642fc2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "ts_67.dump(\"thoughtsource_67.json\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### evaluate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "coll = Collection.from_json(\"thoughtsource_67.json\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# coll.select_generated_cots(author='thoughtsource', model=[\"gpt-3.5-turbo\", \"flan-T5-xxl\"], cot_trigger=None, instruction=None)\n",
    "coll.select_generated_cots(author='thoughtsource')\n",
    "coll.select_generated_cots(model='gpt-3.5-turbo')\n",
    "# coll.select_generated_cots(cot_trigger=[None,'zhou-01'])\n",
    "# coll.select_generated_cots(instruction=[None])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "eval = coll.evaluate()\n",
    "table = evaluation_as_table(eval)\n",
    "table"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "a19cb823e24c98ee05a9cfa4a3a579b5d56d4c1a735f2a12456750b95a1e155e"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
